In [1]:
import numpy as np
import pandas as pd
# import seaborn as sns
# import matplotlib.pyplot as plt
import altair as alt
In [2]:
!jupyter lab --version
1.0.0a5
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [3]:
df = pd.read_csv('wz_results_data/wz_results2014.csv')
df.head()
Out[3]:
wz_id avg_wz_delay max_wz_delay total_wz_delay avg_am_delay max_am_delay total_am_delay wz_am_lottr avg_mid_delay max_mid_delay ... road category direction speed wz_start wz_end start_mm end_mm tmc_count wz_duration
0 57973 0.257360 7.943611 1164.946034 0.885616 7.943611 290.379914 1.054281 0.490151 3.868032 ... I-94 Shoulder Closure Eastbound 70 2014-01-06T12:00:00Z 2014-01-12T23:00:00Z 257.086293 270.882097 13 155.0
1 58121_2 0.730808 20.399221 2551.476425 1.540802 7.200835 403.235553 1.097282 0.941530 9.681833 ... I-94 Shoulder Closure Eastbound 70 2014-01-06T12:00:00Z 2014-01-12T23:00:00Z 225.710404 228.987383 31 155.0
2 58122_2 0.311223 7.107770 1435.737326 0.925106 5.488907 284.193455 1.050463 0.574029 5.130233 ... I-94 Shoulder Closure Eastbound 70 2014-01-06T12:00:00Z 2014-01-12T23:00:00Z 243.260280 257.456120 13 155.0
3 58123 0.587366 19.850262 2330.688297 1.417203 8.341462 396.314395 1.072203 0.659571 9.789736 ... I-94 Shoulder Closure Eastbound 70 2014-01-06T12:00:00Z 2014-01-12T23:00:00Z 231.756632 243.780846 28 155.0
4 58124 2.323270 41.551808 10091.481720 7.360915 39.011252 2474.326730 1.636411 4.234357 41.551808 ... I-75 Shoulder Closure Northbound 70 2014-01-06T13:00:00Z 2014-01-12T23:00:00Z 49.487994 53.061950 37 154.0

5 rows × 37 columns

In [4]:
df.shape
Out[4]:
(638, 37)
In [5]:
df = df[['total_wz_delay', 'avg_wz_delay', 'max_wz_delay', 'max_queue_duration', 'total_queue_duration', 'num_queue', 'max_queue_length',
             'wz_id', 'category', 'county', 'road', 'direction', 'speed', 'wz_start', 'wz_end', 'start_mm', 'end_mm', 'tmc_count', 'wz_duration']].dropna()
df['category'] = pd.Categorical(values = df['category'], categories = ['Shoulder Closure', 'Single Lane Closure', 'Double Lane Closure',
       'Multiple Lane Closure'])

# total_wz_delay
df2 = df.sort_values(by='total_wz_delay', ascending=False)
df2 = df2.reset_index(drop=True).reset_index()
In [6]:
df2.head()
Out[6]:
index total_wz_delay avg_wz_delay max_wz_delay max_queue_duration total_queue_duration num_queue max_queue_length wz_id category county road direction speed wz_start wz_end start_mm end_mm tmc_count wz_duration
0 0 10091.481720 2.323270 41.551808 310.0 1795.0 29.0 5.933702 58124 Shoulder Closure Macomb County I-75 Northbound 70 2014-01-06T13:00:00Z 2014-01-12T23:00:00Z 49.487994 53.061950 37 154.0
1 1 8899.264924 0.551742 73.962062 705.0 1375.0 4.0 7.256744 70000 Single Lane Closure Kent County I-96 Westbound 70 2014-08-23T02:00:00Z 2014-08-25T09:00:00Z 54.000000 50.000000 6 55.0
2 2 5384.293770 1.936029 36.115275 750.0 1470.0 12.0 6.053285 67988 Double Lane Closure Wayne County I-75 Southbound 70 2014-07-12T01:00:00Z 2014-07-14T09:00:00Z 39.000000 33.000000 23 56.0
3 3 4155.726330 0.490321 61.052171 190.0 960.0 22.0 5.124032 59599_1 Shoulder Closure Wayne County I-75 Northbound 70 2014-02-17T14:00:00Z 2014-02-21T20:00:00Z 51.674659 61.102122 47 102.0
4 4 4097.812867 1.103452 49.108797 590.0 1055.0 9.0 6.268528 65627 Double Lane Closure Wayne County I-75 Northbound 70 2014-05-31T00:00:00Z 2014-06-02T09:00:00Z 34.011807 38.499262 20 57.0
In [7]:
alt.Chart(df2).mark_point().encode(
    x='index',
    y='total_wz_delay',
    color = 'category',
    tooltip=['wz_id', 'category', 'county', 'road', 'direction']
)
Out[7]:
In [8]:
base = alt.Chart(df2).mark_circle().encode(
    x='index:Q',
    y='total_wz_delay:Q',
).properties(
    width=1000,
    height=300
)
In [9]:
base
Out[9]:
In [10]:
chart = base.encode(color = 'category:N',
                    tooltip=['wz_id', 'category', 'county', 'road', 'direction'],
                    opacity=alt.value(0.8)).properties(
    title='')
In [11]:
chart
Out[11]:
In [12]:
chart = chart.encode(
    column='category:N'
    ).properties(
        width=250,
        height=250
    ).properties(
        title='2014'
    )
In [13]:
chart
Out[13]:
In [16]:
chart.\
    configure_header(
        title = None,
        titleFontSize=34,
        titleFont='Courier',
        titleAnchor='middle',
        labelColor='blue'
    ).\
    configure_title(
        fontSize=34,
        anchor='middle',
        color='blue')
Out[16]:
In [ ]:
 
In [ ]:
df = pd.read_csv('wz_results_data/wz_results2014.csv')
df.head()
In [ ]:
 
In [ ]:
 
In [148]:
chart.configure_axis(titleFontSize= 90)
Out[148]:
In [177]:
chart.save('chart.png', scale_factor=2.0)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
~\Anaconda3\lib\site-packages\selenium\webdriver\common\service.py in start(self)
     75                                             stderr=self.log_file,
---> 76                                             stdin=PIPE)
     77         except TypeError:

~\Anaconda3\lib\subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors)
    708                                 errread, errwrite,
--> 709                                 restore_signals, start_new_session)
    710         except:

~\Anaconda3\lib\subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, unused_restore_signals, unused_start_new_session)
    996                                          os.fspath(cwd) if cwd is not None else None,
--> 997                                          startupinfo)
    998             finally:

FileNotFoundError: [WinError 2] The system cannot find the file specified

During handling of the above exception, another exception occurred:

WebDriverException                        Traceback (most recent call last)
<ipython-input-177-0cccca577e4a> in <module>()
----> 1 chart.save('chart.png', scale_factor=2.0)

~\Anaconda3\lib\site-packages\altair\vegalite\v3\api.py in save(self, fp, format, override_data_transformer, scale_factor, vegalite_version, vega_version, vegaembed_version, **kwargs)
    472         if override_data_transformer:
    473             with data_transformers.enable('default', max_rows=None):
--> 474                 result = save(**kwds)
    475         else:
    476             result = save(**kwds)

~\Anaconda3\lib\site-packages\altair\utils\save.py in save(chart, fp, vega_version, vegaembed_version, format, mode, vegalite_version, embed_options, json_kwds, webdriver, scale_factor)
    100                                         vegaembed_version=vegaembed_version,
    101                                         webdriver=webdriver,
--> 102                                         scale_factor=scale_factor)
    103         if format == 'png':
    104             write_file_or_filename(fp, mimebundle['image/png'], mode='wb')

~\Anaconda3\lib\site-packages\altair\utils\mimebundle.py in spec_to_mimebundle(spec, format, mode, vega_version, vegaembed_version, vegalite_version, **kwargs)
     54                               vega_version=vega_version,
     55                               vegaembed_version=vegaembed_version,
---> 56                               vegalite_version=vegalite_version, **kwargs)
     57         if format == 'png':
     58             render = base64.b64decode(render.split(',', 1)[1].encode())

~\Anaconda3\lib\site-packages\altair\utils\headless.py in compile_spec(spec, format, mode, vega_version, vegaembed_version, vegalite_version, scale_factor, driver_timeout, webdriver)
    152             webdriver_options.add_argument('--no-sandbox')
    153 
--> 154     driver = webdriver_class(options=webdriver_options)
    155 
    156     try:

~\Anaconda3\lib\site-packages\selenium\webdriver\chrome\webdriver.py in __init__(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, keep_alive)
     71             service_args=service_args,
     72             log_path=service_log_path)
---> 73         self.service.start()
     74 
     75         try:

~\Anaconda3\lib\site-packages\selenium\webdriver\common\service.py in start(self)
     81                 raise WebDriverException(
     82                     "'%s' executable needs to be in PATH. %s" % (
---> 83                         os.path.basename(self.path), self.start_error_message)
     84                 )
     85             elif err.errno == errno.EACCES:

WebDriverException: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home
In [149]:
# !conda list
In [1]:
import altair as alt
from vega_datasets import data

source = data.cars.url

chart = alt.Chart(source).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N',
    column='Origin:N'
).properties(
    width=180,
    height=180
)

chart.configure_header(
    titleColor='green',
    titleFontSize=14,
    labelColor='red',
    labelFontSize=14
)
Out[1]:
In [5]:
chart.save('chart.png', scale_factor=3.0)
In [3]:
chart.save('chart2.svg', scale_factor=2.0)
In [ ]:
 
In [ ]:
 
In [158]:
pd.read_json(data.cars.url).head()
Out[158]:
Acceleration Cylinders Displacement Horsepower Miles_per_Gallon Name Origin Weight_in_lbs Year
0 12.0 8 307.0 130.0 18.0 chevrolet chevelle malibu USA 3504 1970-01-01
1 11.5 8 350.0 165.0 15.0 buick skylark 320 USA 3693 1970-01-01
2 11.0 8 318.0 150.0 18.0 plymouth satellite USA 3436 1970-01-01
3 12.0 8 304.0 150.0 16.0 amc rebel sst USA 3433 1970-01-01
4 10.5 8 302.0 140.0 17.0 ford torino USA 3449 1970-01-01
In [159]:
chart = alt.Chart(df2).mark_point().encode(
    x='index:Q',
    y='total_wz_delay:Q',
    color='category:N',
    column='category:N'
).properties(
    width=180,
    height=180
)
In [160]:
chart
Out[160]:
In [ ]:
 
In [ ]:
 
In [ ]:
 

Combined years analysis

In [3]:
df = pd.read_csv('wz_results_data/wz_results_combi.csv')
In [4]:
df = df[['total_wz_delay', 'avg_wz_delay', 'max_wz_delay', 'max_queue_duration', 'total_queue_duration', 'num_queue', 'max_queue_length',
             'wz_id', 'category', 'county', 'road', 'direction', 'speed', 'wz_start', 'wz_end', 'start_mm', 'end_mm', 'tmc_count', 'wz_duration', 'lat', 'lon', 'year']].dropna()
df['category'] = pd.Categorical(values = df['category'], categories = ['Shoulder Closure', 'Single Lane Closure', 'Double Lane Closure',
       'Multiple Lane Closure'])

# total_wz_delay
df2 = df.sort_values(by='total_wz_delay', ascending=False)
df2 = df2.reset_index(drop=True).reset_index()
In [5]:
df2.head()
Out[5]:
index total_wz_delay avg_wz_delay max_wz_delay max_queue_duration total_queue_duration num_queue max_queue_length wz_id category ... speed wz_start wz_end start_mm end_mm tmc_count wz_duration lat lon year
0 0 14634.770899 0.656701 49.930747 550.0 6035.0 63.0 6.341538 114914 Single Lane Closure ... 70 2017-09-19 13:00:00 2017-10-03 01:00:00 11.000000 13.00000 16 324.000000 42.484755 -83.235069 2017
1 1 10091.481720 2.323270 41.551808 310.0 1795.0 29.0 5.933702 58124 Shoulder Closure ... 70 2014-01-06 13:00:00 2014-01-12 23:00:00 49.487994 53.06195 37 154.000000 42.341933 -83.076010 2014
2 2 9537.371106 0.859474 64.269638 480.0 2355.0 18.0 5.778698 94261 Double Lane Closure ... 70 2015-11-15 23:00:00 2015-11-23 14:34:00 46.000000 42.00000 31 183.566667 42.304252 -83.110407 2015
3 3 9496.469722 0.875525 64.269638 480.0 2355.0 18.0 5.778698 93622 Double Lane Closure ... 70 2015-11-15 23:00:00 2015-11-23 09:00:00 46.000000 42.00000 31 178.000000 42.304252 -83.110407 2015
4 4 8899.264924 0.551742 73.962062 705.0 1375.0 4.0 7.256744 70000 Single Lane Closure ... 70 2014-08-23 02:00:00 2014-08-25 09:00:00 54.000000 50.00000 6 55.000000 42.879384 -85.328247 2014

5 rows × 23 columns

In [ ]:
 
In [6]:
base = alt.Chart(df2).mark_circle().encode(
    x='index:Q',
    y='total_wz_delay:Q',
).properties(
    width=1000,
    height=300
)
In [7]:
base
Out[7]:
In [8]:
chart = base.encode(color = 'category:N',
                    tooltip=['wz_id', 'category', 'county', 'road', 'direction', 'lat', 'lon', 'year'],
                    opacity=alt.value(0.8)).properties(
    title='')
In [9]:
chart
Out[9]:
In [10]:
chart.encode(
    column='category:N'
    ).properties(
        width=250,
        height=250
)
#     ).properties(
#         title='2014'
#     )
Out[10]:
In [11]:
chart_facet = chart.encode(
    column='category:N',
    row = 'year:N',
    
    ).properties(
    title='Total Work Zone Delay',
        width=250,
        height=250
)
In [12]:
chart_facet
Out[12]:
In [14]:
chart_facet.\
    configure_axisLeft(title=None).\
    configure_axisBottom(title=None).\
    configure_header(
        title = None,
        titleFontSize=34,
        titleFont='Courier',
        titleAnchor='middle',
        labelColor='blue',
        labelFontSize= 18).\
    configure_title(
        fontSize=34,
        anchor='middle',
        color='limegreen').\
    interactive()
Out[14]:
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [63]:
import altair as alt
from vega_datasets import data

counties = alt.topo_feature(data.us_10m.url, 'counties')
source = data.unemployment.url

alt.Chart(counties).mark_geoshape().encode(
    color='rate:Q'
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(source, 'id', ['rate'])
).project(
    type='albersUsa'
).properties(
    width=500,
    height=300
)
Out[63]:
In [72]:
from vega_datasets import data
In [157]:
data.unemployment().sample(5)
Out[157]:
id rate
592 17001 0.079
295 8105 0.048
91 2280 0.098
1710 31119 0.042
575 16055 0.087
In [77]:
counties = alt.topo_feature(data.us_10m.url, 'counties')
In [92]:
counties.url
Out[92]:
'https://vega.github.io/vega-datasets/data/us-10m.json'
In [83]:
data.us_10m.description
In [96]:
# pd.read_table(counties.url)
In [85]:
data.us_10m.url
Out[85]:
'https://vega.github.io/vega-datasets/data/us-10m.json'
In [98]:
data.us_10m.description
In [99]:
data.us_10m.is_local
Out[99]:
False
In [ ]:
data.us_10m()
In [105]:
alt.topo_feature(data.us_10m.url, feature='counties')
Out[105]:
UrlData({
  format: TopoDataFormat({
    feature: 'state',
    type: 'topojson'
  }),
  url: 'https://vega.github.io/vega-datasets/data/us-10m.json'
})
In [111]:
alt.topo_feature('https://github.com/deldersveld/topojson/blob/master/countries/us-states/MI-26-michigan-counties.json', feature='states')
Out[111]:
UrlData({
  format: TopoDataFormat({
    feature: 'states',
    type: 'topojson'
  }),
  url: 'https://github.com/deldersveld/topojson/blob/master/countries/us-states/MI-26-michigan-counties.json'
})
In [117]:
data.us_10m.url
Out[117]:
'https://vega.github.io/vega-datasets/data/us-10m.json'
In [108]:
data.airports.description
Out[108]:
'This dataset lists US airports, including airport code, city, state, latitude, and longitude. This dataset is a subset of the data compiled and published at http://ourairports.com/data/, and is in the public domain.'
In [155]:
data.airports().head()
Out[155]:
iata name city state country latitude longitude
0 00M Thigpen Bay Springs MS USA 31.953765 -89.234505
1 00R Livingston Municipal Livingston TX USA 30.685861 -95.017928
2 00V Meadow Lake Colorado Springs CO USA 38.945749 -104.569893
3 01G Perry-Warsaw Perry NY USA 42.741347 -78.052081
4 01J Hilliard Airpark Hilliard FL USA 30.688012 -81.905944
In [15]:
url = 'https://github.com/deldersveld/topojson/blob/master/countries/us-states/MI-26-michigan-counties.json'
url = 'https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/MI-26-michigan-counties.json'
In [16]:
url
Out[16]:
'https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/MI-26-michigan-counties.json'
In [17]:
source = alt.topo_feature(url, feature='cb_2015_michigan_county_20m')
source
Out[17]:
UrlData({
  format: TopoDataFormat({
    feature: 'cb_2015_michigan_county_20m',
    type: 'topojson'
  }),
  url: 'https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/MI-26-michigan-counties.json'
})
In [18]:
background = alt.Chart(source).\
    mark_geoshape(
        fill='lightgray',
        stroke='white').\
    encode(tooltip='properties.NAME:N').\
    properties(
        width=500,
        height=500)# .project('albersUsa')#.properties(projection = {'type': 'identity', 'reflectY': True})
In [19]:
background
Out[19]:
In [20]:
points = alt.Chart(df2).mark_circle().encode(
    longitude='lon:Q',
    latitude='lat:Q',
    tooltip=['wz_id', 'category', 'county', 'road', 'direction', 'lat', 'lon', 'year'],
    color='category:N')

(background + points)
Out[20]:
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [21]:
import altair as alt
from vega_datasets import data

airports = data.airports.url
states = alt.topo_feature(data.us_10m.url, feature='states')

# US states background
background = alt.Chart(states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=500,
    height=300
).project('albersUsa')


points = alt.Chart(df2).mark_circle().encode(
    longitude='lon:Q',
    latitude='lat:Q',
    tooltip=['wz_id', 'category', 'county', 'road', 'direction', 'lat', 'lon', 'year'],
    color='category:N')

(background + points)
Out[21]:
In [111]:
alt.Chart(df2).mark_circle().encode(
    longitude='lon:Q',
    latitude='lat:Q',
    tooltip=['wz_id', 'category', 'county', 'road', 'direction', 'lat', 'lon', 'year'],
    color='category:N')#.interactive()
Out[111]:
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [105]:
base = alt.Chart(df2).mark_circle().encode(
    x='lon:Q',
    y='lat:Q',
    color='category:N',
    tooltip=['wz_id', 'category', 'county', 'road', 'direction', 'lat', 'lon', 'year'],
).interactive()
In [106]:
base
Out[106]:
In [33]:
import folium
In [64]:
f = folium.Figure(width=600, height=800)

m = folium.Map(location=[df['lat'].mean(), df['lon'].mean()], 
               zoom_start=7,
               min_lat=df['lat'].min()-0.5, 
               max_lat=df['lat'].max()+0.5, 
               min_lon=df['lon'].min()-0.5, 
               max_lon=df['lon'].max()+0.5,
              max_bounds =False)

f.add_child(m)
Out[64]:
In [96]:
def add_marker(row):
    marker = folium.CircleMarker(location=[row['lat'], row['lon']],
                             radius=2,
                            popup= row.wz_id)
    marker.add_to(m)
In [98]:
row = df.iloc[0]
df2.iloc[0:500, :].apply(add_marker, axis=1)
m
Out[98]:
In [95]:
m
Out[95]:
In [ ]:
 
In [16]:
import altair as alt

url = "https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/MI-26-michigan-counties.json"

source = alt.topo_feature(url, feature="cb_2015_michigan_county_20m")
In [17]:
alt.Chart(source).mark_geoshape().encode(
    tooltip='properties.NAME:N'
)
Out[17]:
In [ ]:
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['x'], empty='none')
In [ ]:
nearest
In [ ]: